Author

Colin J Lee

Code
library(lubridate)
library(Amelia)
library(plyr)
library(psych)
library(lavaan)
library(tidyverse)

set.seed(420)

wd <- getwd()
setwd(wd)
Code
ipcs_codebook <- sprintf("1. codebook-2022-06-07.xlsx", wd) %>%
  readxl::read_xlsx(., sheet = "IPCS"); ipcs_codebook
Warning in sprintf("1. codebook-2022-06-07.xlsx", wd): one argument not used by
format '1. codebook-2022-06-07.xlsx'
New names:
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
Code
# State Data  
load(sprintf("%s/Data/scrubbed_data_W1.RData", wd))
Code
### how many participants initially in dataset?
ipcs_states %>%
  group_by(SID) %>%
  distinct(SID) %>%
  nrow() #199 
[1] 199
Code
# Merge E_Cpmn and A_Cmpn cols (due to typo) and remove E_Cmpn
ipcs_states$A_Cmpn <- coalesce(ipcs_states$A_Cmpn, ipcs_states$E_Cmpn)
ipcs_states <- ipcs_states[,-21]


# Min 25; 153 participants
ipcs_states <- ipcs_states %>%
  group_by(SID) %>%
  filter(n() >= 25) %>% #199 participants to 153
  arrange(SID, lubridate::ymd_hm(Date)) %>%
  mutate(all_beeps = seq(1, n(), 1)) %>%
  ungroup() %>%
  mutate(SID = as.numeric(SID))

length(unique(ipcs_states$SID))
[1] 153
Code
ipcs_profiles_id <- unique(ipcs_states$SID)

How much missingness of facets before imputation?

Code
ipcs_miss <- ipcs_states %>% select(N_Depr:A_Cmpn)
sum(is.na(ipcs_miss))/(8095*15)
[1] 0.3626848
Code
#36.2%
Code
ipcs_states_mi <- data.frame(unclass(ipcs_states %>% select(-Date, -Hour, -Minute, -`Hour Block 1`, -HourBlock)))
ipcs_states_mi <- amelia(ipcs_states_mi, m = 1, ts = "all_beeps", cs = "SID")$imputations[[1]] %>%
  as_tibble() %>%
  full_join(ipcs_states %>% select(SID, Date, all_beeps)) %>%
  select(-all_beeps); ipcs_states_mi
-- Imputation 1 --

  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20
 21 22 23 24 25
Joining with `by = join_by(SID, all_beeps)`
Code
# check ranges after imputation
ipcs_states_range <- data.frame(sapply(ipcs_states_mi, function(x) range(x, na.rm = TRUE)))
ipcs_states_range <- data.frame(t(ipcs_states_range)) #outside 1 and 5
ipcs_states_range
Code
# look at state values post MI
ipcs_pers <- ipcs_states_mi[,2:16]
describe(ipcs_pers) # min and max values outside of 1-5 range
Code
# restrict range of state values after MI
ipcs_pers[ ipcs_pers > 5 ] <- 5
ipcs_pers[ ipcs_pers < 1] <- 1

### Same for situation
ipcs_sit <- ipcs_states_mi[,17:24]
ipcs_sit[ ipcs_sit > 3 ] <- 3
ipcs_sit[ ipcs_sit < 1 ] <- 1

#recombine
ipcs_states_mi <- cbind(ipcs_states_mi$SID, ipcs_pers, ipcs_sit, ipcs_states_mi$Date)
colnames(ipcs_states_mi)[1] <- "SID"
colnames(ipcs_states_mi)[25] <- "Date"
Code
ipcs_states_long <- ipcs_states_mi %>%
  pivot_longer(names_to = c("name", "item")
               , names_sep = "_"
               , values_to = "value"
               , cols = c(-SID, -Date)
               , values_drop_na = T) %>%
  left_join(ipcs_codebook %>% filter(level == "state") %>% select(category, name, item) %>% distinct())
Joining with `by = join_by(name, item)`
Code
# now that codebook is joined, make it wide
ipcs_wide <- ipcs_states_long %>%
  filter(category == "pers") %>%
  distinct() %>%
  pivot_wider(
    names_from = c("category", "name", "item")
    , names_sep = "_"
    , values_from = "value"
  ) %>%
  full_join(
    ipcs_states_long %>%
      distinct() %>%
      filter(category == "sit") %>%
      select(-item) %>%
      pivot_wider(
        names_from = c("category", "name")
        , names_sep = "_"
        , values_from = "value"
      )
  ) 
Joining with `by = join_by(SID, Date)`
Code
# order personality variables EACNO
ipcs_wide_order <- ipcs_wide %>%
  select("pers_E_Scblty", "pers_E_EnerLev", "pers_E_Assert", "pers_A_Trust", "pers_A_Rspct", "pers_A_Cmpn", "pers_C_Prdctv", "pers_C_Rspnbl", "pers_C_Org", "pers_N_Depr", "pers_N_Anxty", "pers_N_EmoVol", "pers_O_IntCur", "pers_O_AesSens", "pers_O_CrtvImag")
colnames(ipcs_wide)
 [1] "SID"             "Date"            "pers_N_Depr"     "pers_E_Scblty"  
 [5] "pers_C_Prdctv"   "pers_O_IntCur"   "pers_N_Anxty"    "pers_O_CrtvImag"
 [9] "pers_A_Rspct"    "pers_E_EnerLev"  "pers_A_Trust"    "pers_O_AesSens" 
[13] "pers_E_Assert"   "pers_C_Rspnbl"   "pers_N_EmoVol"   "pers_C_Org"     
[17] "pers_A_Cmpn"     "sit_Mating"      "sit_pOsitivity"  "sit_Intellect"  
[21] "sit_Negativity"  "sit_Adversity"   "sit_Sociability" "sit_Duty"       
[25] "sit_Deception"  
Code
#order situation variables DIAMONDS
ipcs_wide_sit_order <- ipcs_wide %>%
  select(sit_Duty, sit_Intellect, sit_Adversity, sit_Mating, sit_pOsitivity, sit_Negativity, sit_Deception, sit_Sociability)

ipcs_wide <- cbind(ipcs_wide$SID, ipcs_wide$Date, ipcs_wide_order, ipcs_wide_sit_order)

colnames(ipcs_wide) <- c("SID", "Date", "Sociability", "EnergyLevel", "Assertive", "Trust", "Respect", "Compassion", "Productivity", "Responsibility", "Organization", "Depression", "Anxiety", "EmotionalVol", "IntCuriosity", "AesthSense", "CrtvImagination", "Duty", "Intellect","Adversity", "Mating", "pOsitivity", "Negativity", "Deception", "Sociality")

Transform facets into percent of maximum possible scores

Code
pomp <- function(x, na) (x - min(x, na.rm = na))/(max(x, na.rm = na) - min(x, na.rm = na))*100

ipcs_wide[,3:17] <- ipcs_wide %>%
  select(Sociability:CrtvImagination) %>%
  mutate(
    Sociability = pomp(Sociability, TRUE)
    , EnergyLevel =  pomp(EnergyLevel, TRUE)
    , Assertive =  pomp(Assertive, TRUE)
    , Trust =  pomp(Trust, TRUE)
    , Respect =  pomp(Respect, TRUE)
    , Compassion =  pomp(Compassion, TRUE)
    , Productivity =  pomp(Productivity, TRUE)
    , Responsibility =  pomp(Responsibility, TRUE)
    , Organization =  pomp(Organization, TRUE)
    , Depression =  pomp(Depression, TRUE)
    , Anxiety =  pomp(Anxiety, TRUE)
    , EmotionalVol =  pomp(EmotionalVol, TRUE)
    , IntCuriosity =  pomp(IntCuriosity, TRUE)
    , AesthSense =  pomp(AesthSense, TRUE)
    , CrtvImagination =  pomp(CrtvImagination, TRUE)
  )

describe(ipcs_wide) #cool

Fix some stuff and check if there are NAs

Code
# Date as POSIXct
ipcs_wide$Date <- ymd_hm(ipcs_wide$Date)
str(ipcs_wide)
'data.frame':   8095 obs. of  25 variables:
 $ SID            : num  1 1 1 1 1 1 1 1 1 1 ...
 $ Date           : POSIXct, format: "2018-10-22 15:23:00" "2018-10-22 23:23:00" ...
 $ Sociability    : num  76.5 65 20.7 35.6 72 ...
 $ EnergyLevel    : num  75 50 75 75 75 ...
 $ Assertive      : num  75 75 25 25 64.5 ...
 $ Trust          : num  50 25 43.7 75 67.5 ...
 $ Respect        : num  100 50 62.5 75 75 ...
 $ Compassion     : num  75.7 55.1 71.8 53.1 100 ...
 $ Productivity   : num  75 0 13.1 25 75 ...
 $ Responsibility : num  54 90.4 25 25 88.3 ...
 $ Organization   : num  75 100 26.3 82.4 57.9 ...
 $ Depression     : num  25 50 50 50 25 50 50 50 50 50 ...
 $ Anxiety        : num  41.7 100 62.5 75 25 ...
 $ EmotionalVol   : num  58.8 100 72.5 50 21.1 ...
 $ IntCuriosity   : num  50 75 25 17.3 62.9 ...
 $ AesthSense     : num  31.7 100 49.8 44.2 100 ...
 $ CrtvImagination: num  25 18.4 25 25 75 ...
 $ Duty           : num  3 3 3 2 1 3 3 3 3 3 ...
 $ Intellect      : num  2 2 2 3 3 2 2 3 3 3 ...
 $ Adversity      : num  1 1 1 1 1 3 2 3 2 2 ...
 $ Mating         : num  1 1 1 1 2 1 2 1 1 3 ...
 $ pOsitivity     : num  3 1 2 1 1 2 2 1 2 3 ...
 $ Negativity     : num  1 2 2 3 1 3 3 3 3 3 ...
 $ Deception      : num  3 1 1 1 1 2 1 2 2 2 ...
 $ Sociality      : num  3 3 1 3 1 3 1 3 3 3 ...
Code
# No NAs/NANs/Infs
table(is.na(ipcs_wide)) #0

 FALSE 
202375 
Code
sum(apply(ipcs_wide,2,is.nan)) #0
[1] 0
Code
sum(apply(ipcs_wide,2,is.infinite)) #0
[1] 0
Code
sum(ipcs_wide < 0) #0
[1] 0

p211 and 212 have dates messed up. once the time series goes into the new year (1/1), they have the wrong years

Code
which(ipcs_wide$SID == 212, arr.ind = TRUE) #rows 5067 - 5148
 [1] 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081
[16] 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096
[31] 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111
[46] 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126
[61] 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141
[76] 5142 5143 5144 5145 5146 5147 5148
Code
ipcs_wide[5067:5148,] #all p212
Code
ipcs_wide[5103:5148,] #p212 december measurement points
Code
year(ipcs_wide[5103:5148,]$Date) <- 2018

#check it
p212 <- ipcs_wide %>%
  filter(SID ==212)

Reverse Score Neuroticism

Code
ipcs_wide <- ipcs_wide %>%
  mutate(Depression = 100 - Depression,
         Anxiety = 100 - Anxiety,
         EmotionalVol = 100 - EmotionalVol) %>%
  rename(Depression_r = Depression,
         Anxiety_r = Anxiety,
         EmotionalVol_r = EmotionalVol)

Subset out the participant with 0 variance depression

Code
# First subset p171 into their own df -- 0 variance Depr
ipcs_wide_171 <- ipcs_wide %>%
  filter(SID == 171) %>%
  select(-Depression_r)

# ipcs without 171
ipcs_wide_sit <- ipcs_wide %>%
  filter(SID != 171)

ipcs_list_pre <- split(ipcs_wide_sit, f = ipcs_wide_sit$SID)

#nested
ipcs_nested_pre_no_profiles <- data.table::rbindlist(ipcs_list_pre, fill=TRUE) %>%
  group_by(SID) %>%
  nest() %>%
  ungroup() %>%
  arrange(SID)
Code
save(ipcs_profiles_id, file = "Data/ipcs_profiles_id.RData") 
save(ipcs_list_pre, file = "Data/ipcs_list_pre.RData") 
save(ipcs_wide_sit, file = "Data/ipcs_wide_sit.RData")
save(ipcs_nested_pre_no_profiles, file = "Data/ipcs_nested_pre_no_profiles.RData")